import pandas.io.data as web
import seaborn as sns
from sqlalchemy import create_engine
import datetime
import pandas as pd
from pygments import highlight
from pygments.lexers.sql import SqlLexer
from pygments.formatters import HtmlFormatter, LatexFormatter
from IPython import display
import functools as ft
import matplotlib.pyplot as plt
import scipy as sp
import scipy.interpolate
from __future__ import division
import numpy as np
import matplotlib.dates as mdates
from scipy.optimize import minimize
from matplotlib.finance import candlestick2_ohlc
from datetime import date
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
CONNECTION_STRING = 'mssql+pymssql://IVYuser:resuyvi@vita.ieor.columbia.edu'
# Gets the database connection
def get_connection():
engine = create_engine(CONNECTION_STRING)
return engine.connect()
# Query database and return results in dataframe
def query_dataframe(query, connection=None):
if connection is None:
connection = get_connection()
return pd.read_sql(query, connection)
# Query database using external file and return results in dataframe
def query_dataframe_f(filename, connection=None):
if connection is None:
connection = get_connection()
with open(filename, 'r') as handle:
return pd.read_sql(handle.read(), connection)
# Print sql query and query results
def print_and_query(filename, connection=None, use_latex=False):
if connection is None:
connection = get_connection()
with open(filename, 'r') as handle:
sql = handle.read()
if use_latex:
display_obj = display.Latex(highlight(
sql, SqlLexer(), LatexFormatter()))
else:
formatter = HtmlFormatter()
display_obj = display.HTML(
'<style type="text/css">{}</style>{}'
.format(
formatter.get_style_defs('.highlight'),
highlight(sql, SqlLexer(), formatter)))
display.display(
display_obj,
pd.read_sql(sql, connection)
)
Recall in the first lecture we discussed the possible impact of a large trade. Having a minute database allows us to examine the consequences of such a trade on a finer scale. Pick a stock from the LiveVol database and find the largest options trade to occur at least one week removed from an Earnings date and at least two times larger than any trade for the subsequent 3 days. We wish to examine minute by minute over the 3-day period.
a) Generate an implied volatility surface for your choice just prior to the large trade. If space is an issue concentrate on a surface centered about the strike and series of the large trade
b) Follow the vol surface minute by minute after the large trade
c) Can you characterize a relaxation time scale for the vol surface
d) Propose a possible trading scheme for a high frequency trader to monetize a future disturbance of this kind
In the LiveVol database we have trade tick data and minute by minute calcualations for Apple, Google and Tesla stocks options in October 2013. We looked at all stocks and analyzed the largest option trades at least one week removed from an earnings date.
In class we saw an interesting example where the whole volatility surface of the stock FDC shifted significantly downwards due to a large option trade. We were hoping that we would be able to find a similar impact from a trade in the tick database but unfortunately we were not able to. That might be due to the fact that all of the stocks we have data for are very liquid so that we would need an extremely large trade to have an obvious and lasting impact on the dynamics of the surface. Since we only have data for one month it might be the case that none of the trades for our stocks in October were large enough to have an impact on price levels or spreads. At least not an impact that is observable for more than a minute.
c)
If we had found a trade that clearly impacted the surface we could have characterized a relaxation time by observing how long it took the surface to revert back to it's "regular" state from before the trade. Since we can't observe a disturbance in the volatilty suraface we can think of two possibilities. If there is a temporary disturbance in the surface due to a large trade it either subdues in less than a minute or the trade needs to be larger relative to average trade size than any of the examples we have observed to appear in the first place.
d)
If one can reliably identify a relaxtion time for the surface of a particular stock and the relaxation time is long enough so that trades can be made before the surface reverts to it's normal state then a simple trading scheme would be take a mean-reverting position. A trader could monitor the options on the stock and whenever he sees a large trade that he expects will disturb the volatility surface he can go short (long) implied volatility on options with the same or similar strike and expiration if he expects the disturbance to be an upwards (downwards) shift. Then close the position after the surface has reverted to it's normal state. Although he would always have to be aware of other events surrounding the options such as earnings, expirations, etc.
Closest earnings date: 2013-10-28
Analysis Date Range: 2013-1-1 - 2013-10-21
# Get 20 largest trades for Apple
q = '''
-- Get tick data for stock
select symbol, [timestamp], expiration, strike, optiontype,
tradesize, tradePrice, tradeConditionID, canceledTradeConditionID
into #data_trade
from XFDATA.dbo.lv_options_trades
where symbol = 'AAPL'
and timestamp < '2013-10-21'
and tradeSize > 0
-- Find largest trades
select top 20 *
from #data_trade
order by tradeSize desc
drop table #data_trade
'''
apple_trades = query_dataframe(q)
apple_trades
The largest trade occured at 12:32 on October 9th and was for 5000 call option contracts with expiration 2013-11-16 and strike 530. However, it's not two times larger than any trade for the following three days since the second (4500) and third (3021) largest trades occur the next day. Looking at the three trading days following those trades we can see that they don't fulfil the criteria either. Since there are no trades that match the criteria in the problem description we will just analyze the largest trade of the month.
# Get volatility surface data
q = '''
select symbol, [timestamp], expiration, strike, [open], high, low, [close],
tradeVolume, bidsize, bestbid, asksize, bestask, impliedUndPrice, activeUndPrice, iv,
datediff(day,convert(date,[timestamp]),expiration) as ExpD
from XFDATA.dbo.lv_minute_options_calcs
where symbol = 'AAPL'
and root = 'AAPL'
and timestamp between '2013-10-09 12:25' and '2013-10-09 12:40'
and optionType = 'c'
and iv > 0.05
and strike between 450 and 550
and datediff(day,convert(date,[timestamp]),expiration) < 100
order by timestamp, strike, expiration
'''
# Display minute-by-minute date around trade (K=530,T=2013-11-16)
apple_large = query_dataframe(q)
mask = (apple_large.strike == 530) & (apple_large.ExpD == 38)
apple_large.loc[mask,:]
Interestingly it seems like the trade had no discernable impact, at least not on it's own series (K=530,T=2013-11-16). There is almost no trading activity before or after the trade, the price is similar and the spread does not change after trade (stays at around 10 cents).
The trade price is lower than the best bid so it looks like someone is selling these contracts and since there is no observable impact we can conclude that this trade was simply not large enough to have an impact on the market. In other words there is enough liquidity in Apple to handle this transaction. However, we must keep in mind that our data is only on a minute-by-minute scale and there might have been a short term impact on the bid ask spread which was resolved in less than a minute, but we can't analyze that using the data we have access to.
We plot the volatility surface centered around the strike and expiration of our option from 12:25 to 12:40, as is expected there is no observable impact from the trade. If there was an impact we should have seen it occuring at 12:33 as shift or twist in the surface, but the surface stays in a rigid structure throughout the whole period.
# Plot minute-by-minute volatility surface
for idx, row in apple_large.groupby('timestamp'):
# 2D grid construction
spline = sp.interpolate.Rbf(row.strike,row.ExpD,row.iv,function='thin-plate')
xi = np.linspace(min(row.strike), max(row.strike))
yi = np.linspace(min(row.ExpD), max(row.ExpD))
X, Y = np.meshgrid(xi, yi)
# 3D interpolation
Z = spline(X,Y)
fig = plt.figure(figsize=(20, 8))
ax = fig.gca(projection='3d')
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1,cmap=plt.cm.coolwarm, linewidth=0.5, antialiased=True)
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel('Strike')
ax.set_ylabel('Time-to-maturity')
ax.set_zlabel('Implied volatility')
ax.set_zlim([0,1])
plt.title(idx)
Closest earnings date: 2013-10-17
Analysis Date Range: 2013-1-1 - 2013-10-10 and 2013-10-24 - 2013-10-31
# Get 20 largest trades for Google
q = '''
-- Get tick data for stock
select symbol, [timestamp], expiration, strike, optiontype,
tradesize, tradePrice, tradeConditionID, canceledTradeConditionID
into #data_trade
from XFDATA.dbo.lv_options_trades
where symbol = 'GOOG'
and (timestamp < '2013-10-10' or timestamp > '2013-10-24')
and tradeSize > 0
-- Find largest trades
select top 20 *
from #data_trade
order by tradeSize desc
drop table #data_trade
'''
google_trades = query_dataframe(q)
google_trades
The largest trade of October is shared between two trades and it looks like they are a part of a spread. The trade occured at 12:00 on October 25th and was for 1482 put option contracts expiring at the end of that day with strikes 1015 and 1025. Since the trade occured at the expiration date of the options we can reasonably assume that in this case someone is closing out a spread position and it's unlikely that the trade had an impact on the market. No other smaller trades in the month fulfil the criteria we are looking for so we will go forward and analyze the activity around this trade.
# Get volatility surface data
q = '''
select symbol, [timestamp], expiration, strike, [open], high, low, [close],
tradeVolume, bidsize, bestbid, asksize, bestask, impliedUndPrice, activeUndPrice, iv,
datediff(day,convert(date,[timestamp]),expiration) as ExpD
from XFDATA.dbo.lv_minute_options_calcs
where symbol = 'GOOG'
and root = 'GOOG'
and timestamp between '2013-10-25 11:50' and '2013-10-25 12:10'
and optionType = 'p'
and strike between 900 and 1100
and datediff(day,convert(date,[timestamp]),expiration) < 100
and iv > 0.05
order by timestamp, strike, expiration
'''
# Display minute-by-minute date around trade (K=1025,T=2013-10-25)
google_large = query_dataframe(q)
mask = (google_large.strike == 1015) & (google_large.ExpD == 0)
mask2 = (google_large.strike == 1025) & (google_large.ExpD == 0)
google_large.loc[mask,:].append(google_large.loc[mask2,:])
The table above show the price and volatility data for both options around the trade (K=1015,T=2013-11-25) and (K=1025,T=2013-11-25). It looks like this trade is a part of a larger trade that is spread out into parts from 11:58 to 12:01 for around 2000 contracts. We can't see the whole order book for the options but since the bid and ask sizes are so low and it's so close to expiration we can assume that the option is not very liquid. Therefore, it is not surprising to see these big trades temporarly affect the option prices (bid/ask spread) since the trades are likely clearing up a big portion of the order book. After the trades we can observe that the option prices are gradually climbing but that is probably due to the fact that the stock price is dropping and since the option is really close to expiration it's price is very sensitive to all underlying price changes. At least it is hard to attribute it to this trade with any confidence and there is no clear disturbance followed by a relaxation. It is more informative to look at the big picture, the whole volatility surface, to determine if the trade had an effect on the dynamics of the stock options that day.
We plot the volatility surface centered around the strike and expiration of our option from 10:50 to 12:10, there does seem to be some action in the shorter maturity 1000 to 1100 strikes but after closer inspection these changes are simply due to missing data. Calculations are missing from the database on all timestamps where the surface is flat on those regions. Example: All data available at 11:53 but K=[1000,1100] has missing data on 11:54. Taking this into account we can't spot any impact obvious impacts on the dynamics of the surface from the trades. The minute-by-minute dynamics are similar to those we observed when looking at it in the three days leading up to the trade.
# Plot minute-by-minute volatility surface
for idx, row in google_large.groupby('timestamp'):
# 2D grid construction
spline = sp.interpolate.Rbf(row.strike,row.ExpD,row.iv,function='thin-plate')
xi = np.linspace(min(row.strike), max(row.strike))
yi = np.linspace(min(row.ExpD), max(row.ExpD))
X, Y = np.meshgrid(xi, yi)
# 3D interpolation
Z = spline(X,Y)
fig = plt.figure(figsize=(15, 8))
ax = fig.gca(projection='3d')
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1,cmap=plt.cm.coolwarm, linewidth=0.5, antialiased=True)
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel('Strike')
ax.set_ylabel('Time-to-maturity')
ax.set_zlabel('Implied volatility')
ax.set_zlim([0,2])
plt.title(idx)
Closest earnings date: 2013-11-5
Analysis Date Range: 2013-1-1 - 2013-10-29
# Get 20 largest trades for Tesla
q = '''
-- Get tick data for stock
select symbol, [timestamp], expiration, strike, optiontype,
tradesize, tradePrice, tradeConditionID, canceledTradeConditionID
into #data_trade
from XFDATA.dbo.lv_options_trades
where symbol = 'TSLA'
and timestamp < '2013-10-29'
and tradeSize > 0
-- Find largest trades
select top 20 *
from #data_trade
order by tradeSize desc
drop table #data_trade
'''
tesla_trades = query_dataframe(q)
tesla_trades
It looks like we have the same case as for Google for the largest trade, it is shared between two trades and it looks like it is someone closing out a spread trade. It occured on October 10th at 10:25 and was for 2237 call option contracts at strikes 180 and 185. In addition, it seems like the 6 largest trades in October are all spread trades. We don't expect these spread trades to have a noticable impact on the market or surface dynamics but since no other smaller trades in the month fulfil the criteria we are looking for so we will go forward and analyze the activity around this trade.
# Get volatility surface data
q = '''
select symbol, [timestamp], expiration, strike, [open], high, low, [close],
tradeVolume, bidsize, bestbid, asksize, bestask, impliedUndPrice, activeUndPrice, iv,
datediff(day,convert(date,[timestamp]),expiration) as ExpD
from XFDATA.dbo.lv_minute_options_calcs
where symbol = 'TSLA'
and timestamp between '2013-10-10 10:20' and '2013-10-10 10:35'
and optionType = 'c'
and strike between 150 and 250
and datediff(day,convert(date,[timestamp]),expiration) < 100
and iv > 0.05
order by timestamp, strike, expiration
'''
# Display minute-by-minute date around trade (K=185,T=2013-10-11)
tesla_large = query_dataframe(q)
tesla_large.head()
mask = (tesla_large.strike == 180) & (tesla_large.ExpD == 1)
mask2 = (tesla_large.strike == 185) & (tesla_large.ExpD == 1)
tesla_large.loc[mask,:].append(tesla_large.loc[mask2,:])
The table above show the price and volatility data for both options around the trade (K=180,T=2013-10-11) and (K=1025,T=2013-10-11). The analysis for Google pretty much applies here as well, we have a spread of options close to expiry and large trades. The trades do seem to affect the option bid/ask spreads although less here than in the Google case. But there is no clear period of disturbance followed by a relaxation.
We plot the volatility surface centered around the strike and expiration of our option from 10:25 to 10:40. Yet again the volatility surface shows no disturbance around the trade (10:26).
# Plot minute-by-minute volatility surface
for idx, row in tesla_large.groupby('timestamp'):
# 2D grid construction
spline = sp.interpolate.Rbf(row.strike,row.ExpD,row.iv,function='thin-plate')
xi = np.linspace(min(row.strike), max(row.strike))
yi = np.linspace(min(row.ExpD), max(row.ExpD))
X, Y = np.meshgrid(xi, yi)
# 3D interpolation
Z = spline(X,Y)
fig = plt.figure(figsize=(15, 8))
ax = fig.gca(projection='3d')
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1,cmap=plt.cm.coolwarm, linewidth=0.5, antialiased=True)
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel('Time-to-maturity')
ax.set_ylabel('Strike')
ax.set_zlabel('Implied volatility')
ax.set_zlim([0,2])
plt.title(idx)